Initialize: load libraries and custom functions

setwd("~/google_drive/ImmgenT/jamboree2/immgent_jamboree2_git/")

libs = c("Seurat", "ggplot2", "viridis", "pheatmap", "reshape2", "dplyr") 
sapply(libs, function(x) suppressMessages(suppressWarnings(library(x, character.only = TRUE, quietly = T, warn.conflicts  = F))))
## $Seurat
##  [1] "Seurat"       "SeuratObject" "sp"           "stats"        "graphics"    
##  [6] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## $ggplot2
##  [1] "ggplot2"      "Seurat"       "SeuratObject" "sp"           "stats"       
##  [6] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [11] "base"        
## 
## $viridis
##  [1] "viridis"      "viridisLite"  "ggplot2"      "Seurat"       "SeuratObject"
##  [6] "sp"           "stats"        "graphics"     "grDevices"    "utils"       
## [11] "datasets"     "methods"      "base"        
## 
## $pheatmap
##  [1] "pheatmap"     "viridis"      "viridisLite"  "ggplot2"      "Seurat"      
##  [6] "SeuratObject" "sp"           "stats"        "graphics"     "grDevices"   
## [11] "utils"        "datasets"     "methods"      "base"        
## 
## $reshape2
##  [1] "reshape2"     "pheatmap"     "viridis"      "viridisLite"  "ggplot2"     
##  [6] "Seurat"       "SeuratObject" "sp"           "stats"        "graphics"    
## [11] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## $dplyr
##  [1] "dplyr"        "reshape2"     "pheatmap"     "viridis"      "viridisLite" 
##  [6] "ggplot2"      "Seurat"       "SeuratObject" "sp"           "stats"       
## [11] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [16] "base"
#options(Seurat.object.assay.version = 'v5') #if using V5

#mypal = c('lightgrey','blue','green','orange','red','yellow','lightsalmon','orchid4','pink3','gold4', 'yellowgreen','cyan4','brown','thistle3','tomato3','orange2','mediumpurple1')

#Large color palette:
library("pals")
## 
## Attaching package: 'pals'
## The following objects are masked from 'package:viridis':
## 
##     cividis, inferno, magma, plasma, turbo, viridis
## The following objects are masked from 'package:viridisLite':
## 
##     cividis, inferno, magma, plasma, turbo, viridis
library("RColorBrewer")
n = 70
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
mypal1 = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))))
mypal1 = mypal1[-4]
parade = function(n) { return(Seurat::DiscretePalette(n, palette = "parade", shuffle = F)) }
length(glasbey())
## [1] 32
length(polychrome())
## [1] 36
mypal = c(glasbey(), polychrome(), mypal1)
names(mypal) = NULL

Transfer metadata from your previous object to the integrated

# Load in your original small dataset
orig_sc =  readRDS('dataset_clean.Rds') #~/google_drive/ImmgenT/exp_id/20220912_exp_id_16_CBDM_scurfy/IGT24/dataset_clean.Rds
orig_sc$orig_RNA_clusters = orig_sc$RNA_clusters

# Load in the Integrated dataset
integrated_sc = readRDS('IGT24_withsketch_SeuratV4.Rds')
orig_sc$RNA_clusters = as.character(orig_sc$RNA_clusters)

integrated_sc$orig_RNA_clusters = NA
all(colnames(integrated_sc)[match(colnames(orig_sc), colnames(integrated_sc))] == colnames(orig_sc))
## [1] TRUE
integrated_sc$orig_RNA_clusters[match(colnames(orig_sc), colnames(integrated_sc))] = orig_sc$RNA_clusters

DimPlot(integrated_sc, reduction = "umap_totalvi", group.by = "orig_RNA_clusters", order = TRUE, cols = mypal, label = T, label.box = F)

DimPlot(integrated_sc, reduction = "umap_totalvi", group.by = "ClusterTOTALVI_Res1", order = TRUE, cols = mypal, label = T, label.box = F)

Compare my clusters to the integrated clusters

integrated_sc$ClusterTOTALVI_Res1 = factor(integrated_sc$ClusterTOTALVI_Res1, levels = as.character(c(0:(length(unique(integrated_sc$ClusterTOTALVI_Res1))-1)))) 
integrated_sc$orig_RNA_clusters = factor(integrated_sc$orig_RNA_clusters, levels = as.character(c(1:(length(unique(integrated_sc$orig_RNA_clusters))-1)))) 

#Subset your object to keep only your data
so = integrated_sc[,integrated_sc$IGT == "IGT24"]

so$ClusterTOTALVI_Res1 = factor(so$ClusterTOTALVI_Res1, levels = as.character(c(0:(length(unique(so$ClusterTOTALVI_Res1))-1)))) 
so$orig_RNA_clusters = factor(so$orig_RNA_clusters, levels = as.character(c(1:(length(unique(so$orig_RNA_clusters))-1)))) 


tmp = table(so$orig_RNA_clusters, so$ClusterTOTALVI_Res1)
tmp3 = tmp / rowSums(tmp)
rowSums(tmp3)
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 
##  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
ColorRamp = rev(viridis(100))
tmp3[tmp3==0] = NA
labels_matrix = ifelse(is.na(tmp3), yes = "", no = as.character(signif(tmp3,2)*100))
#labels_matrix = matrix(labels_matrix, nrow(data_matrix), ncol(data_matrix))
pheatmap(mat = signif(tmp3,2)*100, cluster_rows = F, cluster_cols = F, display_numbers = labels_matrix, number_format = "%.0f", breaks = seq(0,100,length.out = length(ColorRamp)+1), col = ColorRamp, fontsize_row = 10, fontsize_col = 10, fontsize = 15, main = "Percent of cells of my original cluster (rows) in the integrated clusters (cols)", na_col = "#FDE725")

Distribution of the integrated clusters in each sample

#Subset your object to keep only your data
so = integrated_sc[,integrated_sc$IGT == "IGT24"]

df = data.frame(cluster = factor(sprintf("cl%s",so@meta.data[,"ClusterTOTALVI_Res1"]), levels = sprintf("cl%s",as.character(c(0:(length(unique(so@meta.data[,"ClusterTOTALVI_Res1"]))-1))))), 
                sample = factor(so$sample_name.1, levels = unique(so$sample_name.1)) )
tmp = table(df$cluster, df$sample)
tmp2 = t(t(tmp) / colSums(tmp)) # normalize the number of cells per sample
colSums(tmp2)
##            WT D19 SPL             WT SPL D3            WT SPL D12 
##                     1                     1                     1 
## Foxp3.p327fs SPL D 19   Foxp3.p327fs SPL D3           WT LUNG D19 
##                     1                     1                     1 
## Foxp3.p327fs LUNG D19  Foxp3.p327fs COL D19            WT COL D19 
##                     1                     1                     1 
##  Foxp3.p327fs SPL D12 
##                     1
tmp3 = tmp2
rowSums(tmp3)
##          cl0          cl1          cl2          cl3          cl4          cl5 
## 2.1614125149 0.8319734045 0.0563947200 0.0314667426 0.8707558050 0.9002874204 
##          cl6          cl7          cl8          cl9         cl10         cl11 
## 0.1297862696 0.3556738954 0.0482725344 0.5294629974 0.3371560254 0.2254185987 
##         cl12         cl13         cl14         cl15         cl16         cl17 
## 0.1570297517 0.0500024601 0.4828918076 0.0229568930 0.0446420424 0.3774682112 
##         cl18         cl19         cl20         cl21         cl22         cl23 
## 0.0236105046 0.1969946263 0.1737327810 0.0958588733 0.1450695312 0.0962772443 
##         cl24         cl25         cl26         cl27         cl28         cl29 
## 0.0678630670 0.1113715208 0.1306584938 0.4067810067 0.0174082896 0.2459507887 
##         cl30         cl31         cl32         cl33         cl34         cl35 
## 0.1226396666 0.0404857035 0.0009372071 0.4154116668 0.0000000000 0.0000000000 
##         cl36         cl37         cl38         cl39         cl40         cl41 
## 0.0086509489 0.0009372071 0.0271027158 0.0019486269 0.0118912304 0.0000000000 
##         cl42         cl43         cl44         cl45         cl46         cl47 
## 0.0101647809 0.0124234179 0.0000000000 0.0090726749 0.0082224855 0.0054828460
tmp4 = melt(tmp3)
head(tmp4)
##   Var1       Var2       value
## 1  cl0 WT D19 SPL 0.369070209
## 2  cl1 WT D19 SPL 0.145161290
## 3  cl2 WT D19 SPL 0.000000000
## 4  cl3 WT D19 SPL 0.000000000
## 5  cl4 WT D19 SPL 0.055977230
## 6  cl5 WT D19 SPL 0.003795066
tmp4$sample = df$sample[match(tmp4$Var2, df$sample)]

#pdf(file = sprintf("%s_SampleComposition_%s.pdf", prefix, res), width =30, height =20)
ColorRamp = rev(viridis(100))
tmp3[tmp3==0] = NA
labels_matrix = ifelse(is.na(tmp3), yes = "", no = as.character(signif(tmp3,2)*100))
#labels_matrix = matrix(labels_matrix, nrow(data_matrix), ncol(data_matrix))
pheatmap(mat = signif(tmp3,2)*100, cluster_rows = F, cluster_cols = F, display_numbers = labels_matrix, number_format = "%.0f", breaks = seq(0,100,length.out = length(ColorRamp)+1), col = ColorRamp, fontsize_row = 10, fontsize_col = 10, fontsize = 15, main = "Percent of cells in each sample", na_col = "#FDE725")

ggplot(tmp4) + geom_point(aes(Var1, value, color = sample), size = I(3), alpha = 0.7) + scale_color_manual(values = mypal) + theme_bw()+ ggtitle(label = "Cluster composition in each Sample") + theme(axis.text.x  = element_text(angle=75, vjust=0.5, size=10)) + facet_wrap(~sample) + NoLegend()

#dev.off()

Plot genes and proteins

#normalize the ADT and RNA data before plotting
so = NormalizeData(so,assay = "ADT", normalization.method = "CLR", verbose = T) 
## Normalizing across features
so = NormalizeData(so,assay = "RNA", normalization.method = "LogNormalize", verbose = T)


NormalizeData(integrated_sc,assay = "ADT", normalization.method = "CLR", verbose = T) 
## Normalizing across features
## An object of class Seurat 
## 55667 features across 29928 samples within 2 assays 
## Active assay: RNA (55487 features, 0 variable features)
##  2 layers present: counts, data
##  1 other assay present: ADT
##  1 dimensional reduction calculated: umap_totalvi
DefaultAssay(integrated_sc) = "ADT"

FeaturePlot(integrated_sc, features = "CD62L", raster = T, slot = "data")

FeaturePlot(integrated_sc, features = "TCRGD", raster = T, slot = "data")

Gene expression differences

library("EnhancedVolcano")
## Loading required package: ggrepel
Idents(so) = so$IGT

markers = FindMarkers(so, ident.1 = "10", ident.2 = "25", group.by = "ClusterTOTALVI_Res1", subset.ident = "IGT24")
EnhancedVolcano(markers, lab = rownames(markers), x = 'avg_log2FC', y = 'p_val', subtitle = "", title = "10 vs 25 in IGT24")

markers = FindMarkers(so, ident.1 = "10", ident.2 = "0", group.by = "ClusterTOTALVI_Res1", subset.ident = "IGT24")
EnhancedVolcano(markers, lab = rownames(markers), x = 'avg_log2FC', y = 'p_val', subtitle = "", title = "10 vs 0 in IGT24")

integrated_sc$clustertest = "others"
integrated_sc$clustertest[integrated_sc$ClusterTOTALVI_Res1 %in% c("40", "46", "26", "24", "19")] = "BottomRight"
Idents(integrated_sc) = integrated_sc$clustertest
DefaultAssay(integrated_sc) = "RNA"
tmp = integrated_sc[,integrated_sc$IGT != "IGT24"]
markers = FindMarkers(tmp, ident.1 = "BottomRight", group.by = "clustertest")
EnhancedVolcano(markers, lab = rownames(markers), x = 'avg_log2FC', y = 'p_val', subtitle = "", title = "BottomRight")
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...

tmp = NormalizeData(tmp,assay = "RNA", normalization.method = "LogNormalize", verbose = T)
FeaturePlot(tmp, features = "Tox", raster = T)

FeaturePlot(tmp, features = "Dock2", raster = T)